/**  
* @Title: TopBaiduCrawler.java
* @Package org.zsen.crawler
* @Description TODO
* @author ZhangSen
* @date 2015年11月1日 下午7:11:27
* @version 
*/ 
package org.zsen.crawler;

import org.zsen.crawler.BaiduExtractor.HotPointExtractor;

import cn.edu.hfut.dmic.webcollector.crawler.MultiExtractorCrawler;

/**
* @ClassName: TopBaiduCrawler
* @Description: 
* @author ZhangSen
* @date 2015年11月1日 下午7:11:27
*
*/
public class BaiduCrawler {
	public static void main(String[] args) {
		 MultiExtractorCrawler crawler = new MultiExtractorCrawler("crawl", true);
		 crawler.addSeed("http://top.baidu.com");
		 crawler.addRegex("http://top.baidu.com/.*");
		 crawler.addExtractor("http://top.baidu.com/buzz.*", HotPointExtractor.class);
		 crawler.setThreads(5);
		 crawler.setTopN(100);
		 try {
			crawler.start(3);
		} catch (Exception e) {
			e.printStackTrace();
		}
		

}
}
